In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
# Load the example dataset of brain network correlations
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
# Pull out a specific subset of networks
used_networks = [1, 3, 4, 5, 6, 7, 8, 11, 12, 13, 16, 17]
used_columns = (df.columns.get_level_values("network")
.astype(int)
.isin(used_networks))
df = df.loc[:, used_columns]
# Compute the correlation matrix and average over networks
corr_df = df.corr().groupby(level="network").mean()
corr_df.index = corr_df.index.astype(int)
corr_df = corr_df.sort_index().T
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 6))
# Draw a violinplot with a narrower bandwidth than the default
sns.violinplot(data=corr_df, palette="Set3", bw=.2, cut=1, linewidth=1)
# Finalize the figure
ax.set(ylim=(-.7, 1.05))
sns.despine(left=True, bottom=True)
Scope
In [3]:
def mm(s_conc, vmax, km):
"""
:param np.array s_conc: substrate concentrations
:param float vmax: maximum reaction rate
:param float km: half substrate concentration
:return np.array: reaction rates
"""
result = vmax*s_conc/(s_conc+km)
return result
In [4]:
s_conc = np.array([m+0.1 for m in range(100)])
plt.plot(s_conc,mm(s_conc, 4, .4), 'b.-')
Out[4]:
In [5]:
s_conc = np.array([m+0.1 for m in range(100)])
params = [(5,0.5), (5, 20), (10, 0.5), (15, 20)]
ymax = max([x for (x,y) in params])
nplots = len(params)
fig = plt.figure()
yticks = np.arange(0, ymax)
cur = 0
for (vmax, km) in params:
cur += 1
ax = fig.add_subplot(nplots, 1, cur)
ax.axis([0, len(s_conc), 0, ymax])
ax.set_yticks([0, ymax])
ax.set_yticklabels([0, ymax])
plt.plot(s_conc, mm(s_conc, vmax, km), 'b.-')
plt.show()
In [6]:
# Parameter plot
km = [y for (x,y) in params]
vmax = [x for (x,y) in params]
plt.axis([0, max(km)+2, 0, max(vmax)+ 2])
plt.xlabel('K_M')
plt.ylabel('V_MAX')
plt.plot(km, vmax, 'bo ')
Out[6]:
There are many python packages for visualization. We'll start with the most popular package, matplotlib. And, we'll use the trip data.
In [7]:
import pandas as pd
import matplotlib.pyplot as plt
# The following ensures that the plots are in the notebook
%inline matplotlib
# We'll also use capabilities in numpy
import numpy as np
df = pd.read_csv("2015_trip_data.csv")
In [8]:
df.head()
Out[8]:
Now let's consider the popularity of the stations.
In [9]:
from_counts = pd.value_counts(df.from_station_id)
to_counts = pd.value_counts(df.to_station_id)
Our initial task is comparison - which stations are most popular. A bar plot seems appropriate.
In [10]:
from_counts.plot.bar()
Out[10]:
Now let's plot the to counts
In [11]:
to_counts.plot.bar()
Out[11]:
We want if there is a general movement of bikes from one station to another. That is, are from and to counts out of balance. This is a comparison task. One approach is to combine the two bar plots in the same figure.
In [12]:
plt.subplot(3,1,1)
from_counts.plot.bar()
plt.subplot(3,1,3)
to_counts.plot.bar()
# Note the use of an empty second plot to provide space between the plots
Out[12]:
But this is deceptive since the two plots have different x-axis.
In [13]:
count_list = [to_counts[x] for x in from_counts.index]
ordered_to_counts = pd.Series(count_list, index=from_counts.index)
plt.subplot(3,1,1)
from_counts.plot.bar()
plt.subplot(3,1,3)
ordered_to_counts.plot.bar()
Out[13]:
But this is awkward since it's difficult to find a specific station. Prefer to sort.
We'd like to compare this with the two stations. So, we need to order the x-axis.
In [14]:
df_counts = pd.DataFrame({'from': from_counts.values, 'to': ordered_to_counts.values}, index=from_counts.index)
df_counts.head()
Out[14]:
In [15]:
df_counts.sort_index(inplace=True) # Modifies the calling dataframe
df_counts.head()
Out[15]:
To find the imbalance, compare the difference between "from" and "to"
In [17]:
df_outflow = pd.DataFrame({'outflow':df_counts.to - df_counts['from']}, index=df_counts.index)
df_outflow.plot.bar(legend=False)
Out[17]:
We can make this readable by only looking at stations with large outflows, either positive or negative.
In [20]:
min_outflow = 500
sel = abs(df_outflow.outflow) > min_outflow
df_outflow_small = df_outflow[sel]
df_outflow_small.plot.bar(legend=False)
Out[20]: